In [249]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt
import kagglehub
from pathlib import Path

path = kagglehub.dataset_download("usaf/world-war-ii")
aerial = pd.read_csv ((Path (path) / Path("operations.csv")))


aerial.to_csv("operations.csv")
Warning: Looks like you're using an outdated `kagglehub` version, please consider updating (latest version: 0.3.5)
C:\Users\Arath\AppData\Local\Temp\ipykernel_23028\4267720121.py:11: DtypeWarning: Columns (7,8,11,13,31,43,44) have mixed types. Specify dtype option on import or set low_memory=False.
  aerial = pd.read_csv ((Path (path) / Path("operations.csv")))
  1. Which day had most missions?
In [250]:
aerial_mission_date = aerial.groupby("Mission Date")["Mission ID"].count()
aerial_mission_date_df = aerial_mission_date.reset_index()
aerial_mission_date_df.columns = ["Mission Date", "Number of Missions"]
pd.DataFrame(aerial_mission_date_df).sort_values(by = "Number of Missions", ascending = False)
Out[250]:
Mission Date Number of Missions
846 3/24/1945 896
948 4/15/1945 874
988 4/22/1945 862
928 4/11/1945 838
923 4/10/1945 778
... ... ...
1824 9/25/1939 1
79 1/24/1941 1
833 3/22/1942 1
1731 8/9/1941 1
382 11/24/1939 1

1899 rows × 2 columns

In [251]:
# Ensure "Mission Date" is in datetime format
aerial["Mission Date"] = pd.to_datetime(aerial["Mission Date"])
aerial["Year-Month"] = aerial["Mission Date"].dt.to_period("M")
print (aerial["Year-Month"])
missions_per_month = aerial.groupby("Year-Month")["Mission ID"].count()
missions_per_month_df = missions_per_month.reset_index()
missions_per_month_df.columns = ["Month", "Number of Missions"]
missions_per_month_df = missions_per_month_df.sort_values(by="Number of Missions", ascending=False)
pd.DataFrame (missions_per_month_df.head(10))
0         1943-08
1         1943-08
2         1943-08
3         1943-08
4         1943-08
           ...   
178276    1945-02
178277    1945-02
178278    1945-02
178279    1945-02
178280    1945-02
Name: Year-Month, Length: 178281, dtype: period[M]
Out[251]:
Month Number of Missions
63 1945-04 14354
62 1945-03 13455
53 1944-06 9550
55 1944-08 9379
54 1944-07 8672
61 1945-02 8094
59 1944-12 7752
58 1944-11 7432
56 1944-09 7379
52 1944-05 7332
In [301]:
import pandas as pd

aerial_country = aerial.dropna(subset=["Country", "Target Country"])  

if isinstance(aerial, pd.DataFrame):
    print("This is a DataFrame!")
else:
    print("Not a DataFrame!")
allies_vs_axis = aerial_country.groupby(["Country", "Target Country"])["Mission ID"].count()
allies_vs_axis = allies_vs_axis.reset_index()
allies_vs_axis
allies_vs_axis = allies_vs_axis.pivot(index="Country", columns="Target Country", values="Mission ID").fillna(0)
allies_vs_axis

plt.figure(figsize=(18, 5))
allies_vs_axis = allies_vs_axis.astype(int)
ax = sns.heatmap (
allies_vs_axis,
vmin=0,
vmax=4000,
fmt="d",
cmap="plasma",
annot_kws={"size": 7},
annot=True)
plt.yticks(rotation=0, fontsize=8)
plt.xticks(fontsize=8)
ax.xaxis.tick_top()
plt.xticks(fontsize=8, rotation=90)
# Rotate numbers vertically
for text in ax.texts:           # Loop through all annotation texts
    text.set_rotation(90)       # Set text to vertical orientation
This is a DataFrame!
No description has been provided for this image
In [235]:
aerial = aerial[pd.isna(aerial.Country)==False]
In [236]:
import geopandas as gpd
import matplotlib.pyplot as plt
from pathlib import Path
import re

longitude_min, longitude_max = -180, 180  # Longitude range
latitude_min, latitude_max = -90, 90     # Latitude range
# Drop rows out of latitude and longitude range
aerial["Takeoff Longitude"] = aerial["Takeoff Longitude"].apply(
    lambda x: re.sub(r"[A-Za-z]+", "", str(x)) if pd.notnull(x) else np.nan
)
aerial["Takeoff Latitude"] = aerial["Takeoff Latitude"].apply(
    lambda x: re.sub(r"[A-Za-z]+", "", str(x)) if pd.notnull(x) else np.nan
)

# Drop rows where 'Takeoff Latitude' is NaN or empty
aerial = aerial.dropna(subset=["Takeoff Latitude"])  # Drop NaN values
aerial = aerial[aerial["Takeoff Latitude"] != ""]   # Drop empty strings explicitly (if they exist)

# Drop rows where 'Takeoff Latitude' is NaN or empty
aerial = aerial.dropna(subset=["Takeoff Longitude"])  # Drop NaN values
aerial = aerial[aerial["Takeoff Longitude"] != ""]   # Drop empty strings explicitly (if they exist)

aerial["Takeoff Longitude"] = aerial["Takeoff Longitude"].apply(
    lambda x: float(x)
)
aerial["Takeoff Latitude"] = aerial["Takeoff Latitude"].apply(
    lambda x: float (x)
)

aerial["Target Longitude"] = aerial["Target Longitude"].apply(
    lambda x: re.sub(r"[A-Za-z]+", "", str(x)) if pd.notnull(x) else np.nan
)
aerial["Target Latitude"] = aerial["Target Latitude"].apply(
    lambda x: re.sub(r"[A-Za-z]+", "", str(x)) if pd.notnull(x) else np.nan
)

# Drop rows where 'Takeoff Latitude' is NaN or empty
aerial = aerial.dropna(subset=["Target Latitude"])  # Drop NaN values
aerial = aerial[aerial["Target Latitude"] != ""]   # Drop empty strings explicitly (if they exist)

# Drop rows where 'Takeoff Latitude' is NaN or empty
aerial = aerial.dropna(subset=["Target Longitude"])  # Drop NaN values
aerial = aerial[aerial["Target Longitude"] != ""]   # Drop empty strings explicitly (if they exist)

aerial["Target Longitude"] = aerial["Target Longitude"].apply(
    lambda x: float(x)
)
aerial["Target Latitude"] = aerial["Target Latitude"].apply(
    lambda x: float (x)
)

print ("ok")
ok
In [237]:
world = gpd.read_file(Path("ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"))
In [238]:
longitude_min, longitude_max = -30, 70  # Longitude range

latitude_min, latitude_max = -90, 90     # Latitude range

europe_north_africa = world.cx[-10:120, 0:90]  # Longitude: -30 to 60, Latitude: 15 to 50

europe_north_africa["bases"] = np.random.randint(0, 100, len(europe_north_africa))

europe_north_africa

fig, ax = plt.subplots(figsize=(15, 16))
aerial_euafrica_raids = aerial[
    (aerial["Takeoff Longitude"] >= longitude_min) & 
    (aerial["Takeoff Longitude"] <= longitude_max) &
    (aerial["Takeoff Latitude"] >= latitude_min) & 
    (aerial["Takeoff Latitude"] <= latitude_max)
]

aerial_euafrica_raids = aerial[
    (aerial["Target Longitude"] >= longitude_min) & 
    (aerial["Target Longitude"] <= longitude_max) &
    (aerial["Target Latitude"] >= latitude_min) & 
    (aerial["Target Latitude"] <= latitude_max)
]


europe_north_africa.plot(ax=ax, color="lightblue", edgecolor="black")

# Plot takeoff points as triangles
ax.scatter(
    aerial_euafrica_raids["Takeoff Longitude"], 
    aerial_euafrica_raids["Takeoff Latitude"], 
    color="blue", marker="o", s=50, label="Takeoff Points", zorder=3
)

ax.scatter(
    aerial_euafrica_raids["Target Longitude"], 
    aerial_euafrica_raids["Target Latitude"], 
    color="red", marker="o", s=50, label="Target Points", zorder=2
)


# Set title and limits
ax.set_title("Europe and North Africa Map with Takeoff Coordinates")
ax.set_xlim(-30, 60)  # Longitude range
ax.set_ylim(15, 90)   # Latitude range

# Add legend
europe_north_africa.plot(ax=ax, color="lightblue", edgecolor="black")
ax.set_xlim(-30, 60)  # Longitude range
ax.set_ylim(15, 90)   # Latitude range
ax.set_title("Europe Air Operations")

plt.legend()
# Show the plot
plt.show()
C:\Users\Arath\AppData\Roaming\Python\Python312\site-packages\geopandas\geodataframe.py:1819: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
No description has been provided for this image
In [ ]:
from matplotlib.animation import FuncAnimation
from matplotlib import animation
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib

# Initialize the plot
fig, ax = plt.subplots(figsize=(9, 8))
# Create a list of unique 'Year-Month' frames for the animation
f = aerial_euafrica_raids["Year-Month"].dropna().sort_values().unique()
# Generate a colormap for years
unique_years = aerial_euafrica_raids["Mission Date"].dt.year.dropna().unique()

colors_a = [matplotlib.colormaps["cool"] (x) for x in np.linspace(0, 1, len(unique_years))]
colors_b = [matplotlib.colormaps["inferno"] (x) for x in np.linspace(0, 1, len(unique_years))]

year_to_color_bases = {year: colors_a[i] for i, year in enumerate(unique_years)}
year_to_color_targets = {year: colors_b[i] for i, year in enumerate(unique_years)}

aerial_euafrica_raids["Takeoff Longitude"] = aerial_euafrica_raids["Takeoff Longitude"][
    aerial_euafrica_raids["Takeoff Longitude"] < 180
]

aerial_euafrica_raids["Takeoff Latitude"] = aerial_euafrica_raids["Takeoff Latitude"][
    aerial_euafrica_raids["Takeoff Latitude"] < 90
]

aerial_euafrica_raids.to_csv ("test.csv")

# Initialization function
def init():
    europe_north_africa.plot(ax=ax, color="gold", edgecolor="black", zorder=1)
    fig.patch.set_visible(False)  # Remove figure background
    ax.set_title("Mission Takeoff Animation")
    ax.set_xlim(-30, 60)
    ax.set_ylim(15, 90)
    ax.set_xticks([])  # Remove x-axis ticks
    ax.set_yticks([])  # Remove y-axis ticks
    ax.set_xlabel("")  # Remove x-axis label
    ax.set_ylabel("")  # Remove y-axis label
    return ax

# Animation function
def animate(frame):
    ax.clear()
    europe_north_africa.plot(ax=ax, color="gray", edgecolor="black", zorder=1)
    ax.set_xticks([])  # Remove x-axis ticks
    ax.set_yticks([])  # Remove y-axis ticks
    ax.set_xlabel("")  # Remove x-axis label
    ax.set_ylabel("")  # Remove y-axis label
    # Filter data for the current Year-Month
    current_data = aerial_euafrica_raids[aerial_euafrica_raids["Year-Month"] == frame]
    current_year = frame.year

    # Plot takeoff points with color based on Year
    ax.scatter(
        current_data["Takeoff Longitude"],
        current_data["Takeoff Latitude"],
        color=year_to_color_bases[current_year],
        marker="o",
        s=100,
        label=f"Base take off, Date: {frame}"
    )
    ax.scatter(
        current_data["Target Longitude"],
        current_data["Target Latitude"],
        color=year_to_color_targets[current_year],
        marker="o",
        s=100,
        label=f"Target location"
    )
    start_lon = current_data["Takeoff Longitude"]
    start_lat = current_data["Takeoff Latitude"] 

    end_lon = current_data["Target Longitude"]
    end_lat = current_data["Target Latitude"]

    ax.plot([start_lon, end_lon], [start_lat, end_lat], color="black", linestyle="--", linewidth=2)

    # Add titles and legend
    #ax.set_title(f"Takeoff Points - {frame}")
    ax.set_xlim(-30, 60)
    ax.set_ylim(15, 90)
    ax.legend(loc="upper right")
    return ax


# Display the animatio
C:\Users\Arath\AppData\Local\Temp\ipykernel_23028\3602433301.py:20: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aerial_euafrica_raids["Takeoff Longitude"] = aerial_euafrica_raids["Takeoff Longitude"][
C:\Users\Arath\AppData\Local\Temp\ipykernel_23028\3602433301.py:24: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aerial_euafrica_raids["Takeoff Latitude"] = aerial_euafrica_raids["Takeoff Latitude"][
No description has been provided for this image

Bombing Missions 1939 - 1945

In [ ]:
from IPython.display import HTML
from matplotlib import animation

anim = animation.FuncAnimation(fig, animate, init_func = init, frames = f, interval=200)
HTML (anim.to_jshtml())
Out[ ]:
No description has been provided for this image
In [ ]: